penguin_pca <- penguins %>%
select(body_mass_g, ends_with("_mm")) %>% #shortcut to select columns that end with _mm
drop_na() %>%
scale() %>% #scale variables
prcomp()
penguin_pca$rotation #loadings for different variables in PCA
## PC1 PC2 PC3 PC4
## body_mass_g 0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm 0.4552503 0.597031143 0.6443012 -0.1455231
## bill_depth_mm -0.4003347 0.797766572 -0.4184272 0.1679860
## flipper_length_mm 0.5760133 0.002282201 -0.2320840 0.7837987
penguin_complete <- penguins %>%
drop_na(body_mass_g, ends_with("_mm"))
#Create a biplot
autoplot(penguin_pca,
data = penguin_complete,
colour = 'species',
loadings = TRUE,
loadings.label = TRUE) + #shows locations on PC1 and PC2 of all observations in data (axis rotation)
theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# almost 69% of variance explained in PC1, and 19% explained in PC2, can also see that Adelie and Chinstrap overlap in multivariance
fish_noaa <- read_excel(here("data", "foss_landings.xlsx")) %>%
clean_names() %>%
mutate(across(where(is.character), tolower)) %>% #transforming columns that have character class to lowercase
mutate(nmfs_name = str_sub(nmfs_name, end = -4)) %>% #removing the last 4 characters of the NMFS name column
filter(confidentiality == "public")
Make a customized graph:
fish_plot <- ggplot(data = fish_noaa, aes(x = year, y = pounds)) +
geom_line(aes(color = nmfs_name), show.legend = FALSE) +
theme_minimal()
fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).
ggplotly(fish_plot) #makes interactive graph
### Use gghighlight to highlight certain series
ggplot(data = fish_noaa, aes(x = year, y = pounds, group = nmfs_name)) +
geom_line(aes(color = nmfs_name)) +
theme_minimal() +
gghighlight(max(pounds) > 1e8) # highlights designated variable, only applies color scheme to this series too
## label_key: nmfs_name
## Warning: Removed 6 row(s) containing missing values (geom_path).
monroe_wt <- read_csv("https://data.bloomington.in.gov/dataset/2c81cfe3-62c2-46ed-8fcf-83c1880301d1/resource/13c8f7aa-af51-4008-80a9-56415c7c931e/download/mwtpdailyelectricitybclear.csv")
## Parsed with column specification:
## cols(
## date = col_character(),
## kWh1 = col_double(),
## kW1 = col_double(),
## kWh2 = col_double(),
## kW2 = col_double(),
## solar_kWh = col_double(),
## total_kWh = col_double(),
## MG = col_double()
## )
monroe_ts <- monroe_wt %>%
mutate(date = mdy(date)) %>%
mutate(record_month = month(date)) %>%
mutate(month_name = month.abb[record_month]) %>% #replacing the number of the month with the abbreviated name of the month
mutate(month_name = fct_reorder(month_name, record_month))
ggplot(data = monroe_ts, aes(month_name, y = total_kWh)) +
geom_jitter()
graph_a <- ggplot(data = penguins, aes(x = body_mass_g, y = flipper_length_mm)) +
geom_point()
graph_b <- ggplot(data = penguins, aes(x = species, y = flipper_length_mm)) +
geom_jitter(aes(color = species), show.legend = FALSE)
graph_a
## Warning: Removed 2 rows containing missing values (geom_point).
# Use | to put graphs side by side
# Use / to put graphs one over the other
(graph_a|graph_b) / fish_plot
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 6 row(s) containing missing values (geom_path).
graph_c <- (graph_a | graph_b) / fish_plot & theme_dark()
graph_c
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 6 row(s) containing missing values (geom_path).
ggsave(here("fig", "graph_c_at.png"), width = 5, height = 6) # save graph to a file !
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 6 row(s) containing missing values (geom_path).